Setup, load packages, import data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RSQLite)
library(ggplot2)
library(stringr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(forcats)
conn <- dbConnect(RSQLite::SQLite(), "data/spacemissions.sqlite")
dbExecute(conn, "UPDATE spacemissions SET Organisation = REPLACE(Organisation, 'Arm??e de l''Air', 'Armée de l''Air') WHERE Organisation LIKE '%Arm??e de l''Air%'")
## [1] 0
dbGetQuery(conn,"PRAGMA table_info(spacemissions)")
## cid name type notnull dflt_value pk
## 1 0 Organisation TEXT 0 NA 0
## 2 1 Location TEXT 0 NA 0
## 3 2 Date TEXT 0 NA 0
## 4 3 Detail TEXT 0 NA 0
## 5 4 Rocket_Status TEXT 0 NA 0
## 6 5 Price TEXT 0 NA 0
## 7 6 Mission_Status TEXT 0 NA 0
df <- dbGetQuery(conn, "SELECT * FROM spacemissions")
Bar charts & histograms
Launches per organisation
dbExecute(conn, "DROP TABLE IF EXISTS organisation_counts")
## [1] 0
dbExecute(conn, "
CREATE TABLE organisation_counts (
organisation VARCHAR(255),
organisation_count INT
);
")
## [1] 0
dbExecute(conn, "
INSERT INTO organisation_counts (organisation, organisation_count)
SELECT Organisation, COUNT(*) AS organisation_count
FROM spacemissions
GROUP BY Organisation;
")
## [1] 56
organisation_counts <- dbGetQuery(conn, "SELECT * FROM organisation_counts ORDER BY organisation_count ASC")
organisation_counts <- organisation_counts %>%
mutate(organisation = factor(organisation, levels = organisation))
ggplot(organisation_counts, aes(x=organisation, y=organisation_count)) +
geom_segment( aes(x=organisation, xend=organisation, y=0, yend=organisation_count), color="skyblue") +
geom_point( color="blue", size=4, alpha=0.6) +
theme_light() +
coord_flip() +
labs(
x = "Organisation",
y = "Number of missions",
title = "Missions per organisation"
) +
theme(
panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
)

Rocket statuses
dbExecute(conn, "DROP TABLE IF EXISTS status_counts")
## [1] 0
dbExecute(conn, "
CREATE TABLE status_counts (
status VARCHAR(255),
count INT
);
")
## [1] 0
dbExecute(conn, "
INSERT INTO status_counts (status, count)
SELECT Rocket_Status, COUNT(*) AS count
FROM spacemissions
GROUP BY Rocket_Status;
")
## [1] 2
status_counts <- dbGetQuery(conn, "SELECT * FROM status_counts ORDER BY count ASC")
status_counts <- status_counts %>%
mutate(status = recode(status,
"StatusActive" = "Active",
"StatusRetired" = "Retired"))
ggplot(status_counts, aes(x=status, y=count)) +
geom_bar(stat="identity", fill=rgb(0.1,0.4,0.5)) +
labs(x="Rocket status",
y="Number of rockets",
title="Rocket status")

Mission costs histogram
mission_costs <- dbGetQuery(conn, "SELECT Price FROM spacemissions")
tidy_costs <- function(costs_vector) {
costs_vector$Price <- as.numeric(gsub(",", "", costs_vector$Price))
costs_vector <- na.omit(costs_vector)
return(costs_vector)
}
mission_costs <- tidy_costs(mission_costs)
ggplot(mission_costs, aes(x=Price)) +
geom_histogram(binwidth=100) +
labs(x="Cost (millions of USD)",
y="Count")

Choropleth maps
Setup countries shapefile
library(countrycode)
library(stringr)
# download countries geojson
tmp_geojson <- tempfile(fileext = ".geojson")
download.file(
"https://raw.githubusercontent.com/datasets/geo-countries/cd9e0635901eac20294a57ee3b3ce0684d5e3f1a/data/countries.geojson",
tmp_geojson
)
# read contents
library(sf)
## Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
countries_sf <- read_sf(tmp_geojson)
Fetch ISO3 codes for each location in dataframe
locations <- df$Location
pattern <- ".*,\\s*(.*)"
locations <- str_match(locations, pattern)[, 2]
country_mappings <- c(
"Russia" = "RUS",
"New Mexico" = "USA",
"Yellow Sea" = "CHN",
"Shahrud Missile Test Site" = "IRN",
"Pacific Missile Range Facility" = "USA",
"Barents Sea" = "RUS",
"Gran Canaria" = "USA"
)
get_iso3_code <- function(country) {
if (country %in% names(country_mappings)) {
return(country_mappings[[country]])
} else if (grepl("Pacific Ocean", country, ignore.case = TRUE)) {
return(NA)
} else {
iso3_code <- countrycode(country, "country.name", "iso3c")
# dynamically update country_mappings to run code faster
country_mappings <<- c(country_mappings, setNames(iso3_code, country))
return(countrycode(country, "country.name", "iso3c"))
}
}
iso3_codes <- lapply(locations, get_iso3_code)
df$ISO_A3 <- iso3_codes
Successes choropleth
success_total <- df %>%
filter(Mission_Status == "Success") %>%
group_by(ISO_A3) %>%
summarise(total_successes = n())
success_shapefile <- merge(countries_sf, success_total, by = "ISO_A3", all.x = TRUE)
#plot choropleth
ggplot(success_shapefile) +
geom_sf(aes(fill = total_successes), linewidth = 0.1, alpha = 0.9) +
theme_classic() +
scale_fill_viridis_c(
trans = "log", breaks = c(1, 5, 10, 100, 500, 1000),
name = "Number of successful mission launches",
guide = guide_legend(
keyheight = unit(3, units = "mm"),
keywidth = unit(12, units = "mm"),
label.position = "bottom",
title.position = "top",
nrow = 1
)
) +
theme(legend.position = "bottom") +
labs(
title = "Successful space missions by country"
)

Failure choropleth
failure_total <- df %>%
filter(str_detect(Mission_Status, "Failure")) %>%
group_by(ISO_A3) %>%
summarise(total_failures = n())
failure_shapefile <- merge(countries_sf, failure_total, by = "ISO_A3", all.x = TRUE)
ggplot(failure_shapefile) +
geom_sf(aes(fill = total_failures), linewidth = 0.1, alpha = 0.9) +
theme_classic() +
scale_fill_viridis_c(
trans = "log", breaks = c(1, 5, 10, 50, 100, 150),
name = "Number of failed mission launches",
guide = guide_legend(
keyheight = unit(3, units = "mm"),
keywidth = unit(12, units = "mm"),
label.position = "bottom",
title.position = "top",
nrow = 1
)
) +
theme(legend.position = "bottom") +
labs(
title = "Failed space missions by country"
)

Plotly sunburst chart
get_values <- function(parents, labels) {
if (length(parents) == 0 || parents == "") {
return(sum(df$ISO_A3 == labels))
} else if (grepl("-", parents)) {
return(sum(df$ISO_A3 == sub("-.*", "", parents) & df$Organisation == sub(".*-", "", parents) & df$Mission_Status == labels))
} else {
return(sum(df$Organisation == labels & df$ISO_A3 == parents))
}
}
get_country_data <- function(country) {
organisations <- unique(subset(df, ISO_A3 == country, select = Organisation))$Organisation
organisations <- paste0(country, "-", organisations)
status_codes <- c("success", "fail", "partialfail", "prelaunchfail")
organisation_codes <- expand.grid(organisation = organisations, item = status_codes)
organisation_codes <- paste0(organisation_codes$organisation, "-", organisation_codes$item)
temp_df <- data.frame(
ids=c(country, organisations, organisation_codes)
)
temp_df$labels <- sub(".*-", "", temp_df$ids)
temp_df <- temp_df %>%
mutate(
labels = case_when(
grepl("prelaunchfail", ids, ignore.case = TRUE) ~ "Prelaunch Failure",
grepl("success", ids, ignore.case = TRUE) ~ "Success",
grepl("partialfail", ids, ignore.case = TRUE) ~ "Partial Failure",
grepl("fail", ids, ignore.case = TRUE) ~ "Failure",
TRUE ~ labels
)
)
temp_df$parents <- ifelse(
str_count(temp_df$ids, "-") == 0,
"",
ifelse(
str_count(temp_df$ids, "-") == 1,
sub("-.*", "", temp_df$ids),
sub("-(?!.*-).*", "", temp_df$ids, perl=TRUE)
)
)
temp_df$values <- mapply(get_values, temp_df$parents, temp_df$labels)
return(temp_df)
}
valid_countries <- success_total$ISO_A3[!is.na(success_total$ISO_A3)]
sunburst_df <- do.call(rbind, lapply(valid_countries, get_country_data))
sunburst_df$values <- as.numeric(sunburst_df$values)
sunburst_df <- sunburst_df[sunburst_df$values != 0, ]
fig <- plot_ly()
fig <- fig %>% add_trace(
type='sunburst',
ids=sunburst_df$ids,
labels=sunburst_df$labels,
parents=sunburst_df$parents,
values=sunburst_df$values,
branchvalues='total',
insidetextorientation='radial'
)
fig
Analysing spends
df$Price <- as.numeric(gsub(",", "", df$Price))
total_price_by_organisation <- df %>%
group_by(Organisation) %>%
summarise(Total_Price = sum(Price, na.rm = TRUE)) %>%
filter(Total_Price!=0) %>%
arrange(Total_Price)
total_price_by_organisation <- total_price_by_organisation %>%
mutate(Organisation = factor(Organisation, levels = Organisation))
ggplot(total_price_by_organisation, aes(x=Organisation, y=Total_Price)) +
geom_segment( aes(x=Organisation, xend=Organisation, y=0, yend=Total_Price), color="skyblue") +
geom_point( color="blue", size=4, alpha=0.6) +
theme_light() +
coord_flip() +
labs(
x = "Organisation",
y = "Total spend (millions of USD)",
title = "Total expenditure on missions per organisation"
) +
theme(
panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
)

mean_price_by_organisation <- df %>%
group_by(Organisation) %>%
summarise(Mean_Price = mean(Price, na.rm = TRUE)) %>%
arrange(Mean_Price)
mean_price_by_organisation <- na.omit(mean_price_by_organisation)
mean_price_by_organisation %>%
mutate(Organisation = fct_reorder(Organisation, Mean_Price)) %>%
ggplot(aes(x=Organisation, y=Mean_Price)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(
title = "Mean cost of mission per organisation",
x = "Organisation",
y = "Mean cost per mission (millions of USD)"
)
